#需要的库
import requests as rq
from bs4 import BeautifulSoup as bs
import time
import pandas as pd

date=[]
data = [] #存放所有记录
#伪装浏览器
header = {
    "User-Agent": "Mozilla/5.0"
    }

data = [] #存放所有记录

#网络资料获取
n = 1 # 排名序号

for j in range(0,10):
    url=f"https://maoyan.com/board/4?offset={j*10}"   # 构造URL实现翻页功能
    response=rq.get(url,headers=header)
    soup=bs(response.content,"html.parser")
    movies=soup.find_all("dd")      #获取对应信息
    for i in movies:
        name=i.select_one(".name").text                           # 取出电影的名称
        actors=i.select_one(".star").text.strip()[3:].split(",")  # 取出电影的主演
        releasetime=i.select_one(".releasetime").text[5:]         # 取出电影的上映时间
        score=i.select_one(".integer").text+movies[0].select_one(".fraction").text #获取分数
        pic_url = str(i.select_one(".board-img")).split(" ")[3][10:-19]   # 获取电影封面图片
        #保存在Excel文档
        data.append([n,name,actors,releasetime,score,pic_url])
        date = {'rank':n,'name':name,'actors':actors,'releasetime':releasetime,'score':score,'Pic_url':pic_url}
        print([n,name,actors,releasetime,score,pic_url])
        n = n+1
    time.sleep(5)  


#保存在excel文档
data = pd.DataFrame(data)
index = ['排名','名字','主演','上映时间','评分','电影海报图片']
data.to_excel('猫眼电影榜单数据.xlsx',index=0,header=index)